We discusss reasons why modelling some variable or phonomenon as Gaussian is ok. - Ontological: As a category, Gaussian distributions are faily common in the wild. - Epistemological: If all we know is the average value of a variable, and a sense of the dispersion of the variable, then the Gaussian distribution is the most conservative distribution incorporating that information.
~ notation used to map variables to statistical distributions/ models.\[ P(\mu, \sigma | X) = \frac{P(X|\mu, \sigma) P(\mu, \sigma)}{P(X)}\] We’re modelling the data as Gaussian, meaning \(P(X|\mu, \sigma)\) = \(Normal(X=x_0| \mu, \sigma)\) We’re also silently asssuming that \(\mu\) and \(\sigma\) are independent, giving us \(P(\mu, \sigma)\) = \(P(\mu)*P(\sigma)\)
library(rethinking)
data <- data(Howell1)
d2 <- Howell1[Howell1$age >= 18,]
mu.list <- seq( from=150, to=160 , length.out=100 )
sigma.list <- seq( from=7 , to=9 , length.out=100 )
post <- expand.grid( mu=mu.list , sigma=sigma.list )
post$LL calculates the log likelihood of the data for every mu, sigma pair i.e log of P(X|mu, sigma)
post$LL <- sapply( 1:nrow(post) , function(i) sum(
dnorm( d2$height , post$mu[i] , post$sigma[i] , log=TRUE ) ) )
post$Prod is log of (Likelihood * prior ) log(likelihood) + log(prior)
post$prod <- post$LL + dnorm( post$mu , 178 , 20 , TRUE ) +
dunif( post$sigma , 0 , 50 , TRUE )
So, now each row contains the log of LikelihoodxPrior against the specific mu-sigma pair in that row. Now, to convert log into probability, we need to exponentiate it. But, since the logged figures are large negative numbers floating point arithmetic errors will simply send exp(large negative value) to zero. To avoid this, instead of exp(LL), we calculate exp(LL - max(LL)), LL - max(LL) will return a small positive value.
scale_constant <- max(post$prod)
post$prod2 <- exp(post$prod - scale_constant)
All that is now left is to re-scale the probabilities (the denominator term P(X)). We may naturally ask, shouldn’t we re-adjust the probabilities to offset the scaling factor? But we can be clever here. Notice that \(\exp(x_i-y) = \frac{\exp(x_i)}{\exp(y)} = \frac{\exp(x_i)}{\exp(\rm{scale-constant})} = \exp(x_i)*\lambda\) Giving us \(\frac{\exp(x_i - y)}{\sum_iexp(x_i -y)} = \frac{exp(x_i)*\lambda}{\sum_iexp(x_i)*\lambda} = \frac{exp(x_i)}{\sum_iexp(x_i)}\)
In short, no we don’t need to re-adjust to offset the scaling factor, leaving us with:
post$prob <- post$prod2/sum(post$prod2)
image_xyz( post$mu , post$sigma , post$prob )
We notice that the values for sigma are much more dispersed, than the values for mu. We can plot the posterior for mu and sigma to confirm this.
plot(post$mu, post$prob)
plot(post$sigma, post$prob)
Well, the posterior for mu looks Gausssian, the posterior for sigma has a thickeer and longer right tail.
sample.rows <- sample( 1:nrow(post) , size=1e4 , replace=TRUE ,
prob=post$prod2 )
sample.mu <- post$mu[ sample.rows ]
sample.sigma <- post$sigma[ sample.rows ]
plot( sample.mu , sample.sigma , cex=0.5 , pch=16 , col=col.alpha(rangi2,0.1) )
smoothScatter( sample.mu , sample.sigma , cex=0.5 , pch=16 , col=col.alpha(rangi2,0.1) )
Not sure what the justification for this approach is. Are there limitations to this approach?
Sampling from the posterior is equivalent to generating a multivariate normal distribution using the mean vector and covariance matrix.
data(Howell1)
d <- Howell1
d2 <- d[ d$age >= 18 , ]
flist <- alist(
height ~ dnorm( mu , sigma ),
mu ~ dnorm( 178 , 20 ),
sigma ~ dunif( 0 , 50 )
)
start <- list(
mu=mean(d2$height),
sigma=sd(d2$height)
)
m4.1 <- quap(flist , data=d2 , start=start)
post_qap <- extract.samples( m4.1 , n=1e4 )
plot(post_qap, col=col.alpha(rangi2,0.1))
plot(density(post_qap$sigma), main="QAP v/s Grid Sampling")
lines(density(sample.sigma),lwd=2,lty=2, col="blue")
# Looks like relative error in the tails is large.
plot( d2$height ~ d2$weight )
Clearly, there’s a strong positive relationship b/w weight and height.
\[ height_i \sim Normal(\mu_i,\sigma) \\ \mu_i = \alpha + \beta(x_i- \bar{x}) \\ \alpha \sim Normal(178, 20) \\ \beta \sim Normal(0,10) \\ \sigma \sim Uniform(0,50) \]
McLerath sanith checks the priors for \(\beta\). We see that a Normal(0,10) priors results in a bunch of lines where height seems to decrease with average weight, something patently false as observed from our inspection. We revise our model to make \(\beta\) positive always.
\[\beta \sim LogNormal(0,1)\]
The log-normal distribution looks like this:
x_seq <- seq(from=-10, to = 10, length.out = 1000)
plot(x_seq,dlnorm(x_seq, 0 , 2),type="l",lwd=2)
lines(x_seq,dlnorm(x_seq, 0 , 1.5 ),col="blue",lwd=2)
lines(x_seq,dlnorm(x_seq, 0 , 1 ),col="red",lwd=3)
lines(x_seq,dlnorm(x_seq, 0 , 0.5 ),col="brown",lwd=2)
lines(x_seq,dlnorm(x_seq, 1 , 1 ),col="yellow",lwd=2)
So basically, the log normal distro assigns 0 probability to negative values, and has a very sharp probability spike around the mean value, subject to the dispersion parameter.
Grid search becomes unmanageably large real quick. Save this for later.
# library(rethinking)
# data(Howell1)
# d <- Howell1
# d2 <- d[ d$age >= 18 , ]
# define the average weight, x-bar
# xbar <- mean(d2$weight)
# sigma.list <- runif( 100 , 0 , 50 )
# alpha.list <- rnorm( 100 , 178 , 20 )
# beta.list <- rnorm( 100 , 0 , 10 )
# post <- expand.grid( alpha=alpha.list , beta=beta.list, xi_xbar = d2$height-xbar)
# post$mu_i <- post$alpha + post$beta*post$xi_xbar
# post_height <- expand.grid(mu_i = post$mu_i,sigma= sigma.list)
# post_height$LL <- sapply(1:nrow(post_height) ,
# function(i) sum(dnorm(d2$height, post_height$mu_i[i], post_height$sigma[i],log = TRUE )))
library(rethinking)
data(Howell1)
d <- Howell1
d2 <- d[ d$age >= 18 , ]
# define the average weight, x-bar
xbar <- mean(d2$weight)
# Fit the model
m4_3 <- quap(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b*(weight-xbar),
a ~ dnorm(178,20),
b ~ dlnorm(0,1),
sigma ~ dunif(0,50)
),
data = d2
)
precis(m4_3)
## mean sd 5.5% 94.5%
## a 154.6013690 0.27030761 154.1693652 155.033373
## b 0.9032809 0.04192362 0.8362789 0.970283
## sigma 5.0718800 0.19115469 4.7663778 5.377382
coef(m4_3)
## a b sigma
## 154.6013690 0.9032809 5.0718800
round(vcov(m4_3),3)
## a b sigma
## a 0.073 0.000 0.000
## b 0.000 0.002 0.000
## sigma 0.000 0.000 0.037
We can now examine the posterior distribution we just generated.
post_m4_3 <- MASS::mvrnorm( n=1e4 , mu=coef(m4_3) , Sigma=vcov(m4_3) )
plot(density(post_m4_3[,"a"]))
Note that we are implicitly following the resampling approach to posterior probability for parameters. In other words, the relatively posterior probability of parameter values is encoded via the # of times that value appearss in our resampled vector. There is no separaate vector or function to generate posterior probaabilities.
\[
E(height) = E(Normal(\mu_i,\sigma)) = E(\mu_i) = E(\alpha + \beta(x-\bar{x}))
\] Since we have alpha and beta as independent (Check the covariance matrix for confirmation), we have \[
E(\alpha + \beta(x-\bar{x}) = E(\alpha) + E(\beta)*E(x) - E(E(\bar{x})) = E(\alpha)
\] Using the sample mean as the estimator, we get \(E(\alpha)\) = mean(post_m4_3[,"a"])
mean(post_m4_3[,"a"])
## [1] 154.6057
So, we want \(Variance(\mu_i|weight)\)
mu_at_weight <- function(weight, sample_matrix, avg_weight) {
sample_matrix[,"a"] + sample_matrix[,"b"]*(weight - avg_weight)
}
mu_at_50 <- mu_at_weight(50, post_m4_3, xbar)
plot(density(mu_at_50))
weight.seq <- seq( from=25 , to=70 , by=1 )
mu <- sapply( weight.seq , mu_at_weight, sample_matrix=post_m4_3, avg_weight=xbar )
## So each row in the mu matrix corresponds to a value of my calculated using the values in one sample from the posterior.
## Each column corresponds to one unique value from the sequence 25 to 70.
mu.mean <- apply( mu , 2 , mean ) ## We average over each column to obtain the average mu for a particular weight
mu.CI <- apply( mu , 2 , PI , prob=0.89 ) ## We also find the 89% CI for mu against each weight column
# use type="n" to hide raw data
plot( height ~ weight , d2 , type="n" )
# Sample a few rows
plot_rows <- sample(1:nrow(mu),1000)
# Loop over sampled rows, and plot the mu values in the row
for (i in plot_rows){
points( weight.seq , mu[i,] , pch=16 , col=col.alpha(rangi2,0.1))
}
plot(density(mu_at_50))
So this graph suggests that each of these clumps of points is a normal distribution with mean centered at \(\mu|weight\)
Here’s a small graph to show that the uncertainty in mu (average weight) is a function of distance from the mean weight (xbar). 30 and 60 are at the same distance from xbar ~ 15 kg. Their 89% CI has the same width.
mu_CI_width <- mu.CI[2,] - mu.CI[1,]
plot(weight.seq, mu_CI_width)
abline(h=mu_CI_width[which(weight.seq==60)])
abline(v=c(30,60))
We have \(height_i \sim Normal(\mu_i,\sigma)\). In the previous sections we generated \(\sigma\) using the \(\alpha\) and \(\beta\) values from the posterior. We will not use the same \(\alpha\) and \(\beta\) values along with \(\sigma\) values to simulate height
# Sampled posterior is in post_m4_3
# for each weight
# do:
# mu_vector = a + b*(weight - avg weight)
# sigma_vector = sigma
# generate 1000 draws from rnorm(mean=mu_vector, sd=sigma_vector)
weight_seq <- seq(from=25, to = 70, by = 1)
height_sim <- sapply(weight_seq, function(weight){
rnorm( n=1000, mean = post_m4_3[,"a"] + post_m4_3[,"b"]*(weight-xbar), sd=post_m4_3[,"sigma"])
})
height_mean <- apply(height_sim, 2, mean)
height_PI <- apply(height_sim, 2, rethinking::PI, prob=0.89)
plot( height ~ weight , d2 , col=col.alpha(rangi2,0.5) )
points(weight_seq, height_mean,pch=2)
shade(height_PI, weight_seq)
shade(mu.CI, weight_seq, col=col.alpha(rangi2,0.5))
e_alpha <- mean(post_m4_3[,"a"])
e_beta <- mean(post_m4_3[,"b"])
curve( e_alpha + e_beta*(x - xbar) , add=TRUE )
“But there’s nothing special about straight lines” I mean, there kinda is – constant slope. Which means that the relationship b/w the outcome and the predictor variable is constant across levels of the predictor variable.
Nothing fancy going on here, we just add higher powers of the predictor variable \(x^2, x^3\) etc. Makes it hard to interpret coefficients though. Also prone to overfitting.
The basic recipe stays the same: 1. Define a likelihood for the outcome variable. Typically the likelihood is some statistical distribution (eg Normal Distro with a mean and a variance parameter). 2. Link the parameters of the liklihood function to the predictor variables using a linear functional form. 3. Apply priors for the components of the parameters. 4. Fit the posterior distribution using Quadiatic Approximation 5. Re-sample from the posterior to obtain values of various parameters, and sub-parameters along with their relative probabilities. 6. Use the re-sampled parameter values to predict and draw inferences.
\[ height_i \sim Normal(\mu_i, sigma) \\ \mu_i = \alpha + \beta_1\hat{x_i} + \beta_2\hat{x_i}^2, \textrm{where} \space \hat{x_i} = x_i - \bar{x}\\ \alpha \sim Normal(178,20) \\ \beta_1 \sim LogNormal(0,1) \\ \beta_2 \sim Normal(0,10) \\ \sigma \sim Uniform(0,50) \]
library(rethinking)
data(Howell1)
d2 <- Howell1
# define the average weight, x-bar
xbar <- mean(d2$weight)
# define de-meaned weight and it's square
d2$weight_std = d2$weight - xbar
d2$weight_std_2 = d2$weight_std^2
# Fit the model
m4_4 <- quap(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b_1*weight_std + b_2*weight_std_2,
a ~ dnorm(178,20),
b_1 ~ dlnorm(0,1),
b_2 ~ dnorm(0,10),
sigma ~ dunif(0,50)
),
data = d2
)
post_m4_4 <- MASS::mvrnorm( n=1e4 , mu=coef(m4_4) , Sigma=vcov(m4_4) )
In statistics, a spline is a smooth function built out of smaller, component functions.
Unlike polynomial regression, B-splines do not directly transform the predictor by squaring or cubing it. Instead they invent a series of entirely new, synthetic predictor variables. Each of these variables serves to gradually turn a specific parameter on and off within a specific range of the predictor variables. Each of these variables is called a basis function
library(rethinking)
data(cherry_blossoms)
d <- cherry_blossoms
Q: In the model definition below, which line is the likelihood? yi ∼ Normal(μ, σ) μ ∼ Normal(0, 10) σ ∼ Exponential(1)
Ans: Likelihoodn = P(data|parameters), y_i fits this description, and is hence the Likelihood.
Q: In the model definition just above, how many parameters are in the posterior distribution? Ans: 2 parameters, mu and sigma, both of which are first degree parameters without any sub-parameters
Q: Using the model definition above, write down the appropriate form of Bayes’ theorem that includes the proper likelihood and priors.
Ans: Fill in the values in the following formula
\[ Pr(\mu,\sigma|h) = \frac{\Pi_iNormal(h_i,\mu,\sigma)*Normal(\mu|0,10)*Exp(\sigma|1)}{\int_\mu\int_\sigma\Pi_iNormal(h_i,\mu,\sigma)*Normal(\mu|0,10)*Exp(\sigma|1)d\mu d\sigma} \]
Q: In the model definition below, which line is the linear model? yi ∼ Normal(μ, σ) μi =α+βxi α ∼ Normal(0, 10) β ∼ Normal(0, 1) σ ∼ Exponential(2)
Ans: A linear model relates a parameter as a linear function of predictor variables. The definition of \(\mu_i\) fits the bill.
Q: In the model definition just above, how many parameters are in the posterior distribution? Ans: 3 parameters, \(\alpha, \beta, \sigma\)
For the model definition below, simulate observed y values from the prior (not the posterior). yi ∼ Normal(μ, σ) μ ∼ Normal(0, 10) σ ∼ Exponential(1)
length <- 1000 #
mu_vector <- rnorm(length, mean = 0, sd = 10)
sigma_vector <- rexp(length, rate=1)
y <- rnorm(n=1e4, mean = mu_vector, sd = sigma_vector)
plot(density(y))
Translate the model just above into a quap formula.
Ans:
q4m2_model <- quap(
alist(
y ~ dnorm(mu, sigma),
mu <- dnorm(0, 10),
sigma ~ dexp(1)
),
data = data
)
Translate the quap model formula below into a mathematical model definition.
flist <- alist(
y ~ dnorm( mu , sigma ),
mu <- a + b*x,
a ~ dnorm( 0 , 10 ),
b ~ dunif( 0 , 1 ),
sigma ~ dexp( 1 )
)
Ans:
\[ y_i \sim Normal(\mu, \sigma) \\ mu = \alpha + \beta*x \\ \alpha \sim Normal(0,10) \\ \beta \sim Uniform(0,1) \\ \sigma \sim Exp(1) \]
Q: A sample of students is meassured for height each year for 3 years. After the third year, you want to fit a linear regression predicting hieght using year as a predictor. Write down the mathematical model definition for this regression using any variable names and prioirs you chose. Be prepared to defend your choice of prioirs
Ans:
\[ y_i \sim Normal(\mu, \sigma) \\ \mu = \alpha + \beta*year \\ \alpha \sim Normal(178,10) \\ \beta \sim LogNormal(0,1) \\ \sigma \sim Uniform(0,50) \]
Now suppose I remind you that every student got taller each year. Does this information lead you to change your choice of priors? How?
Ans: I’ll probably revise the LogNormal mean upwards from zero.
Now suppose I tell you that the variance among heights for students of the same age is never more than 64cm. How does this lead you to revise your priors?
Ans: Since variance <= 64, sd <= sqrt(64) = 8. I’ll revise my prior for sigma to Uniform(0,8)
The weights listed below were recorded in the !Kung census, but the heights were not recorded. Provide predicted heights and 89% intervals for these individuals. That is, fill in the table below, using model-based predictions.
Ans: The question effectively asks us to Predict hieghts and 89% PI for the following weights 46.95 43.72, 64.78, 32.59, 54.63
library(rethinking)
data(Howell1)
d2 <- Howell1
# plot(weight ~ age, data=d2)
# From this plot, given the weights given, we can safely assume that the data relates to adults.
d2 <- d2[ d2$age >= 18 , ]
# define the average weight, x-bar
xbar <- mean(d2$weight)
# define de-meaned weight and it's square
# Fit the model
q4h_1 <- quap(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b*(weight - xbar),
a ~ dnorm(178,20),
b ~ dlnorm(0,1),
sigma ~ dunif(0,50)
),
data = d2
)
## Extract samples from the posterior
post_q4h_1 <- MASS::mvrnorm( n=1e4 , mu=coef(q4h_1) , Sigma=vcov(q4h_1) )
## Predict height for weights
weight_seq <- c(46.95, 43.72, 64.78, 32.59, 54.63)
height_sim <- sapply(weight_seq, function(weight){
rnorm( n=1000, mean = post_q4h_1[,"a"] + post_q4h_1[,"b"]*(weight-xbar), sd=post_q4h_1[,"sigma"])
})
height_mean <- apply(height_sim, 2, mean)
height_PI <- apply(height_sim, 2, rethinking::PI, prob=0.89)
filled_table <- cbind(weight_seq, height_mean, t(height_PI))
print(filled_table)
## weight_seq height_mean 5% 94%
## [1,] 46.95 156.2488 148.2603 164.2505
## [2,] 43.72 153.4574 145.3742 161.4983
## [3,] 64.78 172.3100 163.7962 180.5255
## [4,] 32.59 143.3379 135.1513 151.3215
## [5,] 54.63 163.2597 154.4084 171.4715
Select out all the rows in the Howell1 data with ages below 18 years of age. If you do it right, you should end up with a new data frame with 192 rows in it.
library(rethinking)
data(Howell1)
d2 <- Howell1
d2 <- d2[ d2$age < 18 , ]
nrow(d2) # 192 rows.
## [1] 192
# define the average weight, x-bar
xbar <- mean(d2$weight)
# define de-meaned weight and it's square
# Fit the model
q4h_2 <- quap(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b*(weight - xbar),
a ~ dnorm(110,20),
b ~ dlnorm(0,1),
sigma ~ dunif(0,50)
),
data = d2
)
# Extract the posterior
post_q4h_2 <- MASS::mvrnorm( n=1e4 , mu=coef(q4h_2) , Sigma=vcov(q4h_2) )
Ans (a)
coef(q4h_2)
## a b sigma
## 108.316870 2.716883 8.438689
From the coef table, we see that on average a 10 point increase in weight corresponds to a 27 point increase in height.
# Calculate the 89% interval for mean height and predicted heights
weight_seq <- seq(from=1, to=50, by =1)
mean_height_sim <- sapply(weight_seq, function(weight){
post_q4h_2[,"a"] + post_q4h_2[,"b"]*(weight-xbar)})
mean_height_CI <- apply(mean_height_sim, 2, rethinking::PI, prob=0.89)
mean_height_PI <- apply(mean_height_sim,2,rethinking::PI,prob=0.89)
mean_height_CI_width <- mean_height_CI[2,]- mean_height_CI[1,]
plot(weight_seq, mean_height_CI_width)
# Calculate the actual heights and the 89% interval for heights
height_sim <- sapply(weight_seq, function(weight)
rnorm(1000, mean=post_q4h_2[,"a"] + post_q4h_2[,"b"]*(weight-xbar), sd=post_q4h_2[,"sigma"]))
height_PI <- apply(height_sim,2,rethinking::PI,prob=0.89)
height_PI_width <- height_PI[2,]- height_PI[1,]
plot(weight_seq, height_PI_width)
plot(height ~ weight, data=d2) # Let's pick something like 110 as mean height with sd = 20
curve(mean(post_q4h_2[,"a"]) + mean(post_q4h_2[,"b"])*(x-xbar), from=0, to = 40, n = 50, add = TRUE)
shade(mean_height_PI, weight_seq)
shade(height_PI, weight_seq)
Ans: The rate of increas in height with increas in weight seems to flatten out at higher weights. Moving from 10 kg to 20 kg weight results in an increase in height of ~30 cms. While moving from 30 kg to 40 kg results in height increase of only around 5 cm. In other words, height is a concave function of weight, not a linear function.
Recommendation would be to model average height as a concave function of weight.
Suppose a colleague of yours, who works on allometry, glances at the practice problems just above. Your colleague exclaims, “That’s silly. Everyone knows that it’s only the logarithm of body weight that scales with height!” Let’s take your colleague’s advice and see what happens.
Can you interpret the resulting estimates?
library(rethinking)
data(Howell1)
d2 <- Howell1
d2 <- d2[ d2$age < 18 , ]
# Create a variable to hold the log of weight
d2$log_weight <- log(d2$weight)
# Generate the posterior with QUAP
q4h_3 <- quap(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b*(log_weight),
a ~ dnorm(30,20),
b ~ dlnorm(0,1),
sigma ~ dunif(0,50)
),
data = d2
)
# Sample from the posterior
post_q4h_3 <- MASS::mvrnorm( n=1e4 , mu=coef(q4h_3) , Sigma=vcov(q4h_3) )
round(coef(q4h_3),3)
## a b sigma
## -32.163 50.289 4.657
Interpretation: We know that beta is
\[ \beta = \frac{\Delta height}{\Delta log weight} = \frac{\Delta height}{\frac{\Delta weight}{weight}} \\ \beta = weight * \frac{\Delta height}{\Delta weight} \\ \implies \Delta height = \beta *\frac{\Delta weight}{weight} \\ \textrm{In other words, the impact of unit increase in weight is lower at higher weights. } \\ \textrm{A 10kg increase in weight will result in a 4x higher increase in hight at 15 kg weight, compared to 60 kg weight} \\ \textrm{Thus, taking the log of a predictor variable dampens the effect of higher values of that variable} \]
summary(d2$log_weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.447 2.460 2.832 2.794 3.153 3.801
weight_seq <- seq(from=1.4, to = 3.9, length.out = 100)
# Predicting mean height
mean_height_sim <- sapply(weight_seq, function(weight)
post_q4h_3[,"a"]+post_q4h_3[,"b"]*weight)
mean_height <- apply(mean_height_sim, 2, mean)
mean_height_CI <- apply(mean_height_sim, 2, rethinking::PI, prob=0.89)
# Predicting actual heights
height_sim <- sapply(weight_seq, function(weight)
rnorm(n=1000, mean=post_q4h_3[,"a"]+post_q4h_3[,"b"]*weight, sd=post_q4h_3[,"sigma"]))
height_CI <- apply(height_sim, 2, rethinking::PI, prob=0.89)
plot( height ~ weight , data=d2 , col=col.alpha(rangi2,0.4) )
Then use samples from the quadratic approximate posterior of the model in (a) to superimpose on the plot: (1) the predicted mean height as a function of weight, (2) the 97% interval for the mean, and (3) the 97% interval for predicted heights.
plot( height ~ weight , data=d2 ,
col=col.alpha(rangi2,0.4) )
lines(x=exp(weight_seq), y=mean_height)
shade(mean_height_CI,exp(weight_seq))
shade(height_CI,exp(weight_seq))
Plot the prior predictive distribution for the polynomial regression model in the chapter. You can modify the code that plots the linear regression prior predictive distribution.
Can you modify the prior distributions of α, β1, and β2 so that the prior predictions stay within the biologically reasonable outcome space? That is to say: Do not try to fit the data by hand. But do try to keep the curves consistent with what you know about height and weight, before seeing these exact data.
Since a, b1,b2,b3, and sigma are independent, we’ll predit from prior using their means. Otherise, creating the cartesian product of all these parameters will blow up my computer. So, we’ll simply take the average of these parameters and vary weights to test our predictive fits.
Or maybe, we could take quantiless, and use quantiles to create an approximation grid?
length <- 1000
a <- rnorm( n=length, mean=100 , sd=20 )
b1 <- rlnorm(n=length, mean=0 , sd=1 )
b2 <- rnorm(n=length, mean=-0.75 , sd=1)
b3 <- rnorm(n=length, mean=1 , sd=1 )
sigma <- runif(n=length, min=0 , max=10 )
weight_seq <- seq(from=1, to=100, by =1)
xbar <- mean(weight_seq)
weight_s <- (weight_seq - xbar)/sd(weight_seq)
weight_s2 <- weight_s^2
weight_s3 <- weight_s^3
#mu <- a + b1*weight_s + b2*weight_s2 + b3*weight_s3
height_sim <- sapply(weight_s, function(weight_s)
rnorm(n=100, mean=mean(a) + mean(b1)*weight_s + mean(b2)*weight_s^2 +
mean(b3*weight_s^3, sd=mean(sigma))))
mean_height_sim <- apply(height_sim, 2, mean)
plot(weight_seq, mean_height_sim)
# for(i in 1:100){
# plot(weight_seq, height_sim[i,])
# }
library(rethinking)
data(Howell1)
d2 <- Howell1
plot(height ~ weight, data=d2)
library(rethinking)
data(Howell1)
d2 <- Howell1
# define the average weight, x-bar
xbar <- mean(d2$weight)
# define de-meaned weight and it's square
d2$weight_std = d2$weight - xbar
# Fit the model
overthink_1 <- quap(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b*weight_std ,
a ~ dnorm(178,20),
b ~ dlnorm(0,1),
sigma ~ dunif(0,50)
),
data = d2
)
overthink_1 <- MASS::mvrnorm( n=1e4 , mu=coef(overthink_1) , Sigma=vcov(overthink_1) )
weight_seq <- seq(from=1, to = 100, by = 1)
mean_height_sim <- sapply(weight_seq, function(weight)
overthink_1[,"a"] + overthink_1[,"b"]*(weight-xbar)
)
mean_height_mean <- apply(mean_height_sim,2,mean)
mean_height_CI <- apply(mean_height_sim,2,rethinking::PI, prob=0.89)
mean_height_CI_width <- mean_height_CI[2,] - mean_height_CI[1,]
predict_height_sim <- sapply(weight_seq, function(weight)
rnorm(n=1000, mean = overthink_1[,"a"] + overthink_1[,"b"]*(weight-xbar), sd= overthink_1[,"sigma"]))
predict_height_CI <- apply(predict_height_sim, 2, rethinking::PI, prob=0.89)
predict_height_CI_width <- predict_height_CI[2,] - predict_height_CI[1,]
In plots like the one below, which show the 89% PI for predicted heights, and mean heights, why is the uncertainty interval for predicted heights have constant width at all weight levels?
plot(height ~ weight, data=d2)
lines(weight_seq, mean_height_mean)
shade(mean_height_CI, weight_seq)
shade(predict_height_CI, weight_seq)
plot(weight_seq, predict_height_CI_width)
boxplot(predict_height_CI_width ~ cut(weight_seq,10))
abline(h=mean(predict_height_CI_width), lty=3)